import os, sys, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
sys.path.append('..')
from prediction_denoise import prediction, predict
from glob import glob
import numpy as np
import librosa
import IPython.display as ipd
from signal_utils import audio_files_to_numpy, numpy_audio_to_matrix_spectrogram, snr_db, psnr, apply_noise
from data_plot import plot_spectrogram, plot_3_spectograms
import matplotlib.pyplot as plt
Read noisy voices to clean them up.
noisy_voices_list = glob('..\\data\\validation\\noisy_voice\\*')
noisy_voices_list = [os.path.basename(voice) for voice in noisy_voices_list]
print(noisy_voices_list)
['karol_birds.wav', 'karol_klawiatura.wav', 'karol_myszka.wav', 'marcin_klawiatura.wav']
for voice in noisy_voices_list:
predict(audio_input_prediction=voice, audio_output_prediction=f'pred_{voice}')
Loaded model from disk 128 (22, 128, 128) (22, 128, 128) 8064 63 Loaded model from disk 128 (22, 128, 128) (22, 128, 128) 8064 63 Loaded model from disk 128 (18, 128, 128) (18, 128, 128) 8064 63 Loaded model from disk 128 (5, 128, 128) (5, 128, 128) 8064 63
pred_voices_list = glob('..\\data\\validation\\save_prediction\\*')
pred_voices_list = [os.path.basename(voice) for voice in pred_voices_list]
print(pred_voices_list)
['pred_karol_birds.wav', 'pred_karol_klawiatura.wav', 'pred_karol_myszka.wav', 'pred_marcin_klawiatura.wav']
real_noisy, sr = librosa.load(os.path.join('..\\data\\validation\\noisy_voice',noisy_voices_list[3]), sr=8000)
real_pred, sr = librosa.load(os.path.join('..\\data\\validation\\save_prediction',pred_voices_list[3]), sr=8000)
print('Noisy voice')
ipd.display(ipd.Audio(real_noisy, rate=8000))
print('Predicted voice')
ipd.display(ipd.Audio(real_pred, rate=8000))
Noisy voice
Predicted voice
numerator = (real_noisy.shape[0]//63) * 2
real_noisy_db, real_noisy_pha = numpy_audio_to_matrix_spectrogram(
real_noisy.reshape(1,real_noisy.shape[0]), int(numerator / 2) + 1, numerator + 1, 63
)
numerator = (real_pred.shape[0]//63) * 2
real_pred_db, real_pred_pha = numpy_audio_to_matrix_spectrogram(
real_pred.reshape(1,real_pred.shape[0]), int(numerator / 2) + 1, numerator, 63
)
plot_spectrogram(
real_noisy_db[0,:,:],
sr, # sample rate
63, # hop_length_fft
)
plot_spectrogram(
real_pred_db[0,:,:],
sr, # sample rate
63, # hop_length_fft
)
real_noisy, sr = librosa.load(os.path.join('..\\data\\validation\\noisy_voice',noisy_voices_list[1]), sr=8000)
real_pred, sr = librosa.load(os.path.join('..\\data\\validation\\save_prediction',pred_voices_list[1]), sr=8000)
print('Noisy voice')
ipd.display(ipd.Audio(real_noisy, rate=8000))
print('Predicted voice')
ipd.display(ipd.Audio(real_pred, rate=8000))
Noisy voice
Predicted voice
numerator = (real_noisy.shape[0]//63) * 2
real_noisy_db, real_noisy_pha = numpy_audio_to_matrix_spectrogram(
real_noisy.reshape(1,real_noisy.shape[0]), int(numerator / 2) + 1, numerator + 1, 63
)
numerator = (real_pred.shape[0]//63) * 2
real_pred_db, real_pred_pha = numpy_audio_to_matrix_spectrogram(
real_pred.reshape(1,real_pred.shape[0]), int(numerator / 2) + 1, numerator, 63
)
plot_spectrogram(
real_noisy_db[0,:,:],
sr, # sample rate
63, # hop_length_fft
)
plot_spectrogram(
real_pred_db[0,:,:],
sr, # sample rate
63, # hop_length_fft
)
real_noisy, sr = librosa.load(os.path.join('..\\data\\validation\\noisy_voice',noisy_voices_list[2]), sr=8000)
real_pred, sr = librosa.load(os.path.join('..\\data\\validation\\save_prediction',pred_voices_list[2]), sr=8000)
print('Noisy voice')
ipd.display(ipd.Audio(real_noisy, rate=8000))
print('Predicted voice')
ipd.display(ipd.Audio(real_pred, rate=8000))
Noisy voice
Predicted voice
numerator = (real_noisy.shape[0]//63) * 2
real_noisy_db, real_noisy_pha = numpy_audio_to_matrix_spectrogram(
real_noisy.reshape(1,real_noisy.shape[0]), int(numerator / 2) + 1, numerator + 1, 63
)
numerator = (real_pred.shape[0]//63) * 2
real_pred_db, real_pred_pha = numpy_audio_to_matrix_spectrogram(
real_pred.reshape(1,real_pred.shape[0]), int(numerator / 2) + 1, numerator, 63
)
plot_spectrogram(
real_noisy_db[0,:,:],
sr, # sample rate
63, # hop_length_fft
)
plot_spectrogram(
real_pred_db[0,:,:],
sr, # sample rate
63, # hop_length_fft
)
real_noisy, sr = librosa.load(os.path.join('..\\data\\validation\\noisy_voice',noisy_voices_list[0]), sr=8000)
real_pred, sr = librosa.load(os.path.join('..\\data\\validation\\save_prediction',pred_voices_list[0]), sr=8000)
print('Noisy voice')
ipd.display(ipd.Audio(real_noisy, rate=8000))
print('Predicted voice')
ipd.display(ipd.Audio(real_pred, rate=8000))
Noisy voice
Predicted voice
numerator = (real_noisy.shape[0]//63) * 2
real_noisy_db, real_noisy_pha = numpy_audio_to_matrix_spectrogram(
real_noisy.reshape(1,real_noisy.shape[0]), int(numerator / 2) + 1, numerator + 1, 63
)
numerator = (real_pred.shape[0]//63) * 2
real_pred_db, real_pred_pha = numpy_audio_to_matrix_spectrogram(
real_pred.reshape(1,real_pred.shape[0]), int(numerator / 2) + 1, numerator, 63
)
plot_spectrogram(
real_noisy_db[0,:,:],
sr, # sample rate
63, # hop_length_fft
)
plot_spectrogram(
real_pred_db[0,:,:],
sr, # sample rate
63, # hop_length_fft
)